In this experiment, we had only 1 library, called “D33_D45_D47_CD4_Tcells”.
The hastag antibodies used were:
# lib name
hto_used = samplesheet %>%
filter(Library_name == lib_name) %>%
pull(Antibody) %>%
unique
hto_used
## [1] "TotalSeq_C_2" "TotalSeq_C_5" "TotalSeq_C_6" "TotalSeq_C_7"
## [5] "TotalSeq_C_8" "TotalSeq_C_9" "TotalSeq_C_10" "TotalSeq_C_11"
## [9] "TotalSeq_C_12" "TotalSeq_C_3" "TotalSeq_C_4"
Number of features (genes) and samples (cells) in this library
# Load the PBMC dataset
#data <- Read10X(data.dir = paste0("../counts/",lib_name,"/filtered_feature_bc_matrix/"))
# for CellBender, do on cmd:
# ptrepack --complevel 5 raw_EXP2_l3_cellbender_out.h5:/matrix raw_cellbender_EXP2_l3_out_seurat.h5:/matrix
# output from Cellbender:
lib_name = "D33_D45_D47_CD4_Tcells"
data <- Read10X(data.dir = paste0("../Data/",lib_name,"/sample_filtered_feature_bc_matrix/"))
#data <- Read10X_h5(filename = "../sample_filtered_feature_bc_matrix.h5")
gex <- data[[1]]
hto <- data[["Antibody Capture"]][rownames(data[["Antibody Capture"]]) %in% hto_used, ]
# remove SCoV2 genes
# plasmo.gex <- gex[grep(rownames(gex), pattern = "^gene-PF3D7"),] # assay with Plasmodium genes
# human.gex <- gex[grep(rownames(gex), pattern = "^gene-PF3D7", invert = T),]
# Initialize the Seurat object with the raw (non-normalized data).
#mono <- CreateSeuratObject(counts = gex, project = lib_name, min.cells = 3, min.features = 200)
options(Seurat.object.assay.calcn = TRUE)
hustle <- CreateSeuratObject(counts = gex, project = lib_name, min.cells = 3, min.features = 10)
hustle
## An object of class Seurat
## 14314 features across 2455 samples within 1 assay
## Active assay: RNA (14314 features, 0 variable features)
## 1 layer present: counts
head(hustle@meta.data, 5)
Demultiplex: Assign donor to cells - singlets, doublets, negatives using vireo
# install / upgrade vireoSNP using pip install --upgrade --no-deps vireoSNP
# vireo to check installation errors
# download all cellSNP file from library -> save vcf file as .gz, leave others un-gzipped
# error: index out of range -> solved by downloading the files again and redoing vireo
# command line: vireo -c cellSNP_mat/ -N 5 -o vireo_result/ for 5 donors
# add vireo output to meta data
hustle.demul <- hustle
snp <- read.delim(paste("../Data/",lib_name,"/vireo_result/donor_ids.tsv", sep = ""))
meta <- hustle.demul@meta.data %>%
tibble::rownames_to_column("cell") %>%
left_join(snp)
table(meta$donor_id)
##
## donor0 donor1 donor2 doublet unassigned
## 259 1577 337 98 171
# remove doublets and unassigned
hustle.demul@meta.data <- meta
hustle.demul@meta.data <- hustle.demul@meta.data %>%
as.data.frame() %>%
tibble::column_to_rownames("cell")
## didn't need the next line for CellBender output:
Idents(hustle.demul) <- "donor_id"
hustle.demul <- subset(x = hustle.demul, idents = c("doublet", "unassigned"), invert = T)
hustle <- hustle.demul
# identical(meta$cell, rownames(mono@meta.data))
# [1] TRUE
#Cell Hashing uses oligo-tagged antibodies against ubuquitously expressed surface proteins to place a “sample barcode” on each single cell, enabling different samples to be multiplexed together and run in a single experiment.
# Select cell barcodes detected by both RNA and HTO In the example datasets we have already
# filtered the cells for you, but perform this step for clarity.
hustle.singlet.donor <- hustle
joint.bcs <- intersect(rownames(hustle.singlet.donor@meta.data), colnames(hto))
# identical(colnames(pbmc@assays$RNA), rownames(pbmc@meta.data))
# [1] TRUE
# Subset RNA and HTO counts by joint cell barcodes
#pbmc@assays$RNA <- pbmc@assays$RNA[, joint.bcs]
hustle.hto <- as.matrix(hto[,joint.bcs])
## # Add HTO data as a new assay independent from RNA
Idents(hustle.singlet.donor) <- rownames(hustle.singlet.donor@meta.data)
hustle.singlet.donor[["HTO"]] <- CreateAssayObject(counts = hustle.hto)
# Normalize HTO data, here we use centered log-ratio (CLR) transformation
hustle.singlet.donor <- NormalizeData(hustle.singlet.donor, assay = "HTO", normalization.method = "CLR")
# If you have a very large dataset we suggest using k_function = 'clara'. This is a k-medoid
# clustering function for large applications You can also play with additional parameters (see
# documentation for HTODemux()) to adjust the threshold for classification Here we are using
# the default settings
hustle.singlet.donor <- HTODemux(hustle.singlet.donor, assay = "HTO", positive.quantile = 0.99) #, positive.quantile = 0.99
#hustle.singlet.donor <- MULTIseqDemux(hustle.singlet.donor, assay = "HTO")
# Global classification results
table(hustle.singlet.donor$HTO_classification.global)
##
## Doublet Negative Singlet
## 640 106 1440
#table(hustle.singlet.donor$MULTI_classification)
Idents(hustle.singlet.donor) <- "HTO_classification.global"
#Idents(hustle.singlet.donor) <- "MULTI_classification"
VlnPlot(hustle.singlet.donor, features = c("nFeature_RNA", "nCount_RNA"), pt.size = 0.1, log = F)
#VlnPlot(hustle.singlet.donor, features = c("percent.mt", "percent.plasmo"), pt.size = 0.1, log = TRUE)
hustle.h <- hustle.singlet.donor
# First, we will remove negative cells from the object
# if(any(hustle.h$MULTI_classification == "Negative") == T)
# hustle.h <- subset(hustle.h, idents = c("Negative"), invert = TRUE)
if(any(hustle.h$HTO_classification.global == "Negative") == T)
hustle.h <- subset(hustle.h, idents = c("Negative"), invert = TRUE)
# Calculate a tSNE embedding of the HTO data
DefaultAssay(hustle.h) <- "HTO"
hustle.h <- ScaleData(hustle.h, features = rownames(hustle.h),
verbose = FALSE)
hustle.h <- RunPCA(hustle.h, features = rownames(hustle.h), approx = FALSE)
hustle.h <- RunTSNE(hustle.h, check_duplicates = F)
Idents(hustle.h) <- 'HTO_classification'
#Idents(hustle.h) <- 'MULTI_classification'
#DimPlot(hustle.h)
# To increase the efficiency of plotting, you can subsample cells using the num.cells argument
#HTOHeatmap(hustle, assay = "HTO", ncells = 500)
Features and samples after assigning cells to donors and retaining only singlets
Idents(hustle.singlet.donor) = "HTO_classification"
# Extract singlets
#hashtags <- gsub("Total_Seq", "TotalSeq", hto_used)
hashtags <- gsub("_", "-", hto_used)
# hustle.singlet.donor@meta.data <- hustle.singlet.donor@meta.data %>%
# mutate(Cell_state = case_when(
# as.character(MULTI_classification) %in% hashtags ~ "Singlet",
# .default = MULTI_classification
# ))
#Idents(hustle.singlet.donor) <- "Cell_state"
#hustle.singlet <- subset(hustle.singlet.donor, subset = Cell_state == "Singlet")
# For HTODemux:
hustle.singlet.donor = hustle.h
Idents(hustle.singlet.donor) = "HTO_classification.global"
hustle.singlet <- subset(hustle.singlet.donor, idents = "Singlet")
hustle.singlet
## An object of class Seurat
## 14325 features across 1440 samples within 2 assays
## Active assay: HTO (11 features, 0 variable features)
## 3 layers present: counts, data, scale.data
## 1 other assay present: RNA
## 2 dimensional reductions calculated: pca, tsne
# donorid <- samplesheet %>%
# filter(Library_name == lib_name) %>%
# dplyr::rename(HTO_classification = Antibody) %>%
# #mutate(HTO_classification = gsub("_", "-", HTO_classification)) %>%
# dplyr::rename(Group = `Patient Group`) %>%
# select(Group, HTO_classification) %>%
# distinct()
# donorid <- samplesheet %>%
# filter(Library_name == lib_name) %>%
# dplyr::rename(MULTI_classification = Antibody) %>%
# mutate(MULTI_classification = gsub("_", "-", MULTI_classification)) %>%
# dplyr::rename(Group = `Patient Group`) %>%
# select(Group, MULTI_classification) %>%
# distinct()
hustle.demul <- hustle.singlet
# hustle.demul@meta.data <- hustle.demul@meta.data %>%
# left_join(., donorid)
rownames(hustle.demul@meta.data) <- rownames(hustle.singlet@meta.data)#rownames(hustle.singlet@meta.data)
hustle <- hustle.singlet
Idents(hustle) <- "orig.ident"
hustle[["percent.mt"]] <- PercentageFeatureSet(hustle, pattern = "^MT-")
QC for number of RNA molecules (nCount_RNA), number of features (nFeature_RNA), mitochondrial RNA percent denoting dead cells (percent.mt)
# Visualize QC metrics as a violin plot
DefaultAssay(hustle) <- "RNA"
VlnPlot(hustle, features=c("nCount_RNA", "nFeature_RNA"), ncol = 2)
ggplot(hustle@meta.data, aes(x = nFeature_RNA)) + geom_histogram(binwidth = 50)
#ggplot(mono@meta.data, aes(x = nFeature_RNA)) + geom_histogram(binwidth = 20) + xlim(c(0, 600))
ggplot(hustle@meta.data, aes(x = nFeature_RNA)) +
geom_histogram(binwidth = 50) +
facet_wrap( ~ donor_id)
ggplot(hustle@meta.data, aes(x = nCount_RNA)) + geom_histogram(binwidth = 50)
#ggplot(mono@meta.data, aes(x = nCount_RNA)) + geom_histogram(binwidth = 50) + xlim(c(0, 5000))
hustle[["percent.mt"]] <- PercentageFeatureSet(hustle, pattern = "^MT-")
VlnPlot(hustle, features=c("percent.mt"), ncol = 2)
ggplot(hustle@meta.data, aes(x = percent.mt)) + geom_histogram(binwidth = 1)
#ggplot(mono@meta.data, aes(x = percent.Ribosomal)) + geom_histogram(binwidth = 1)
#ggplot(mono@meta.data, aes(x = percent.Largest.Gene)) + geom_histogram(binwidth = 1)
deadcell_nF_lowercutoff <- 250
deadcell_nF_uppercutoff <- 1500
# final for human reads in EXP1_l1: nF = 500 - 6000
#deadcell_nC_lowercutoff <- 500
deadcell_mt_cutoff <- 12.5
hustle.meta <- hustle@meta.data %>%
mutate(is.dead = case_when(
nFeature_RNA >= deadcell_nF_lowercutoff &
nFeature_RNA <= deadcell_nF_uppercutoff &
percent.mt <= deadcell_mt_cutoff ~ "FALSE",
TRUE ~ "TRUE"))
table(hustle.meta$is.dead)
##
## FALSE TRUE
## 1297 143
ggplot(hustle.meta, aes(x = nFeature_RNA, fill = factor(is.dead))) +
geom_histogram(bins = 50, position = "identity", alpha = 0.5)
ggplot(hustle.meta, aes(x = nCount_RNA, fill = factor(is.dead))) +
geom_histogram(bins = 50, position = "identity", alpha = 0.5)
ggplot(hustle.meta, aes(x = percent.mt, fill = factor(is.dead))) +
geom_histogram(bins = 50, position = "identity", alpha = 0.5)
hustle1 <- subset(hustle, subset = nFeature_RNA > deadcell_nF_lowercutoff & nFeature_RNA < deadcell_nF_uppercutoff & percent.mt < deadcell_mt_cutoff) #nCount_RNA >= 1000 &
hustle1
## An object of class Seurat
## 14325 features across 1297 samples within 2 assays
## Active assay: RNA (14314 features, 0 variable features)
## 1 layer present: counts
## 1 other assay present: HTO
## 2 dimensional reductions calculated: pca, tsne
VlnPlot(hustle1, features=c("nCount_RNA", "nFeature_RNA"), group.by = "orig.ident", ncol = 2)
hustle = hustle1
#save.image("malaria_monocyte_QC.RData")
Assigning hashtags and peptide pools to cells
# assigning cell to donor and pool
hustle@meta.data <- hustle@meta.data %>%
mutate(Donor_expt = case_when(
HTO_classification == "TotalSeq-C-2" ~ "D33",
HTO_classification == "TotalSeq-C-3" ~ "D45",
HTO_classification == "TotalSeq-C-4" ~ "D47",
.default = ""
)) %>%
mutate(pep.pool = case_when(
HTO_classification == "TotalSeq-C-5" ~ "P2",
HTO_classification == "TotalSeq-C-6" ~ "P3",
HTO_classification == "TotalSeq-C-7" ~ "P4",
HTO_classification == "TotalSeq-C-8" ~ "P5",
HTO_classification == "TotalSeq-C-9" ~ "P6",
HTO_classification == "TotalSeq-C-10" ~ "P7",
HTO_classification == "TotalSeq-C-11" ~ "P8",
HTO_classification == "TotalSeq-C-12" ~ "P9",
HTO_classification == "TotalSeq-C-2" ~ "P1",
HTO_classification == "TotalSeq-C-3" ~ "P1",
HTO_classification == "TotalSeq-C-4" ~ "P1",
.default = ""
))
# get donor id from SNP for assigning D33, D45, D47
donorid_snp_hto <- hustle@meta.data %>%
select(Donor_expt, donor_id) %>%
filter(Donor_expt != "") %>%
filter(!donor_id %in% c("doublet", "unassigned")) %>%
group_by(donor_id) %>%
count(Donor_expt, donor_id)
#Put them back in hustle meta data
hustle@meta.data <- hustle@meta.data %>%
mutate(Donor_expt = case_when(
HTO_classification == "TotalSeq-C-2" ~ "D33",
HTO_classification == "TotalSeq-C-3" ~ "D45",
HTO_classification == "TotalSeq-C-4" ~ "D47",
donor_id == "donor0" ~ "D33",
donor_id == "donor1" ~ "D47",
donor_id == "donor2" ~ "D45",
.default = ""
)) %>%
filter(!is.na(donor_id)) %>%
mutate(donor_pep.pool = paste(Donor_expt, pep.pool, sep = "_"))
# # remove left over unassigned and doublet "cells"
# Idents(hustle) <- c("donor_id")
# hustle <- subset(hustle, idents = c("doublet", "unassigned"), invert = T)
table(hustle$Donor_expt)
##
## D33 D45 D47
## 160 202 932
table(hustle$pep.pool)
##
## P1 P2 P3 P4 P5 P6 P7 P8 P9
## 269 189 169 161 59 119 135 56 137
table(hustle$Donor_expt, hustle$pep.pool)
##
## P1 P2 P3 P4 P5 P6 P7 P8 P9
## D33 22 28 19 17 12 13 8 23 18
## D45 59 24 18 11 34 18 9 17 12
## D47 188 137 132 133 13 88 118 16 107
table(hustle$donor_pep.pool)
##
## D33_P1 D33_P2 D33_P3 D33_P4 D33_P5 D33_P6 D33_P7 D33_P8 D33_P9 D45_P1 D45_P2
## 22 28 19 17 12 13 8 23 18 59 24
## D45_P3 D45_P4 D45_P5 D45_P6 D45_P7 D45_P8 D45_P9 D47_P1 D47_P2 D47_P3 D47_P4
## 18 11 34 18 9 17 12 188 137 132 133
## D47_P5 D47_P6 D47_P7 D47_P8 D47_P9
## 13 88 118 16 107
Cells with assigned clonotypes
add_clonotype <- function(seurat_obj){
tcr <- read.csv("../Data/D33_D45_D47_CD4_Tcells/filtered_contig_annotations.csv")
# Clonotype-centric info.
clono <- read.csv("../Data/D33_D45_D47_CD4_Tcells/clonotypes.csv")
# Subsets so only the first line of each barcode is kept,
# as each entry for given barcode will have same clonotype.
tcr <- tcr %>%
distinct(barcode, .keep_all = T) %>%
# Only keep the barcode and clonotype columns.
# We'll get additional clonotype info from the clonotype table.
select(barcode, raw_clonotype_id) %>%
rename(clonotype_id = raw_clonotype_id) %>%
# Slap the AA sequences onto our original table by clonotype_id.
merge(., clono[, c("clonotype_id", "cdr3s_aa")]) %>%
tibble::column_to_rownames("barcode")
seurat_obj <- seurat_obj[,rownames(tcr)]
# Add to the Seurat object's metadata.
clono_seurat <- AddMetaData(object=seurat_obj, metadata=tcr)
return(clono_seurat)
}
Idents(hustle) <- rownames(hustle@meta.data)
hustle <- add_clonotype(hustle)
table(!is.na(hustle$clonotype_id))
##
## TRUE
## 871
# FALSE TRUE
# 515 834
# keep only the clonotypes
hustle <- subset(hustle, cells = colnames(hustle)[!is.na(hustle$clonotype_id)])
hustle
## An object of class Seurat
## 14325 features across 871 samples within 2 assays
## Active assay: RNA (14314 features, 0 variable features)
## 1 layer present: counts
## 1 other assay present: HTO
## 2 dimensional reductions calculated: pca, tsne
Idents(hustle) <- "HTO_maxID"
RidgePlot(hustle, assay = "HTO", features = rownames(hustle@assays[["HTO"]])[1:5])
RidgePlot(hustle, assay = "HTO", features = rownames(hustle@assays[["HTO"]])[6:11])
Pick number of principal components from elbow plot
hustle <- NormalizeData(hustle, normalization.method = "LogNormalize", scale.factor = 10000)
hustle <- FindVariableFeatures(hustle, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(hustle)
hustle <- ScaleData(hustle, features = all.genes)
hustle <- RunPCA(hustle, features = VariableFeatures(object = hustle))
hustle <- JackStraw(hustle, num.replicate = 70)
hustle <- ScoreJackStraw(hustle, dims = 1:10)
#JackStrawPlot(hustle, dims = 1:10)
ElbowPlot(hustle)
use.pcs = 1:15
8482 -> saved.seed
hustle <- RunTSNE(hustle, dims=1:use.pcs, seed.use = saved.seed, perplexity=100)
hustle <- FindNeighbors(hustle, dims = use.pcs)
hustle <- FindClusters(hustle, resolution = c(0.9))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 871
## Number of edges: 35834
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6370
## Number of communities: 7
## Elapsed time: 0 seconds
hustle <- RunUMAP(hustle, dims = use.pcs)
hustle <- RunAzimuth(hustle, reference = "pbmcref")
Visualise cells with cassigned clonotypes
DimPlot(hustle, reduction = "umap", label = TRUE)
DimPlot(hustle, reduction = "umap", group.by = "clonotype_id", label = F) + NoLegend()
DimPlot(hustle, reduction = "umap", group.by = "predicted.celltype.l2", label = T) + NoLegend()
T- and B-cell markers
t_cell_markers <- c("CD3D","CD3E")
b_cell_markers <- c("CD79A", "CD79B")
FeaturePlot(hustle, features = t_cell_markers)
FeaturePlot(hustle, features = b_cell_markers)
#table(!is.na(hustle$clonotype_id),hustle$seurat_clusters)
Gene markers for clusters
markers_all = FindAllMarkers(hustle,genes.use = VariableFeatures(hustle),
only.pos = F,
min.pct = 0.25,
thresh.use = 0.25)
DT::datatable(markers_all)
Gene markers unique to each cluster
markers_all_single <- markers_all[markers_all$gene %in% names(table(markers_all$gene))[table(markers_all$gene) == 1],]
DT::datatable(markers_all_single)
top15_markers <- markers_all %>%
group_by(cluster) %>%
slice_max(n = 20, order_by = abs(avg_log2FC))
DT::datatable(top15_markers)
top5 <- markers_all_single %>% group_by(cluster) %>% top_n(5, avg_log2FC)
dim(top5)
## [1] 35 7
Heatmap with top 5 markers
DoHeatmap(
object = hustle,
features = top5$gene
)
# write.table(top15_markers, "hustleseq_top15_markers_6clusters.txt", row.names = F, quote = F)
# write.table(markers_all_single, "hustleseq_unique_markers_6clusters.txt", row.names = F, quote = F)
Cells mapped to specific epitopes
hustle.meta <- data.frame(hustle@reductions[["umap"]]@cell.embeddings) %>%
tibble::rownames_to_column("Cell") %>%
mutate(clonotype_id = substr(hustle@meta.data$clonotype_id, 9, nchar(hustle@meta.data$clonotype_id))) %>%
mutate(clone_colour = case_when(
clonotype_id == "clonotype1" ~ "cornflowerblue",
clonotype_id == "clonotype2" ~ "cornflowerblue",
clonotype_id == "clonotype4" ~ "salmon",
clonotype_id == "clonotype6" ~ "lightgreen",
clonotype_id == "clonotype9" ~ "navyblue",
clonotype_id == "clonotype24" ~ "darkred",
clonotype_id == "clonotype43" ~ "yellow4",
.default = "gray80"
)) %>%
mutate(peptide.pool = hustle$pep.pool) %>%
mutate(pep.pool.colour = case_when(
peptide.pool == "P1" & clone_colour != "gray80" ~ "#440154",
peptide.pool == "P2" & clone_colour != "gray80" ~ "#472d7b",
peptide.pool == "P3" & clone_colour != "gray80" ~ "#3b528b",
peptide.pool == "P4" & clone_colour != "gray80" ~ "#2c728e",
peptide.pool == "P5" & clone_colour != "gray80" ~ "#21918c",
peptide.pool == "P6" & clone_colour != "gray80" ~ "#28ae80",
peptide.pool == "P7" & clone_colour != "gray80" ~ "#5ec962",
peptide.pool == "P8" & clone_colour != "gray80" ~ "#addc30",
peptide.pool == "P9" & clone_colour != "gray80" ~ "#fde725",
.default = "gray80"
))
ggplot(hustle.meta, aes(umap_1, umap_2, label = clonotype_id,), colour = clonotype_id) +
geom_point(size = 3, alpha = 0.8, colour = hustle.meta$clone_colour) +
theme_bw() +
theme(legend.position = "right")
Peptide pool didn’t determine phenotype
ggplot(hustle.meta, aes(umap_1, umap_2, label = peptide.pool, fill = peptide.pool, colour = peptide.pool)) +
geom_point(size = 3, alpha = 0.8) +
theme_bw()
FeaturePlot(hustle, feature = "MKI67", reduction = "umap")
Cell cycle related genes and regressing them out - Before regressing
# A list of cell cycle markers, from Tirosh et al, 2015, is loaded with Seurat. We can
# segregate this list into markers of G2/M phase and markers of S phase
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
# Create our Seurat object and complete the initalization steps
hustle.cc <- CellCycleScoring(hustle, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
RidgePlot(hustle.cc, features = c("PCNA", "TOP2A", "MCM6", "MKI67"), ncol = 2)
# Running a PCA on cell cycle genes reveals, unsurprisingly, that cells separate entirely by
# phase
hustle.cc <- RunPCA(hustle.cc, features = c(s.genes, g2m.genes))
DimPlot(hustle.cc, reduction = "pca")
hustle.cc <- ScaleData(hustle.cc, vars.to.regress = c("S.Score", "G2M.Score"), features = rownames(hustle.cc))
hustle.cc <- RunPCA(hustle.cc, features = VariableFeatures(hustle.cc), nfeatures.print = 10)
DimPlot(hustle.cc)
# When running a PCA on only cell cycle genes, cells no longer separate by cell-cycle phase
hustle.cc <- RunPCA(hustle.cc, features = c(s.genes, g2m.genes))
DimPlot(hustle.cc, reduction = "pca")
hustle.cc <- RunTSNE(hustle.cc, dims=1:use.pcs, seed.use = saved.seed, perplexity=80)
hustle.cc <- FindNeighbors(hustle.cc, dims = use.pcs)
hustle.cc <- FindClusters(hustle.cc, resolution = c(0.6))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 871
## Number of edges: 35697
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6005
## Number of communities: 5
## Elapsed time: 0 seconds
hustle.cc <- RunUMAP(hustle.cc, dims = use.pcs)
DimPlot(hustle.cc, group.by = "Phase")
hustle.cc <- RunAzimuth(hustle.cc, reference = "pbmcref")
DimPlot(hustle.cc, group.by = "predicted.celltype.l2")
Same clonotype info after regressing
hustle.cc.meta <- data.frame(hustle.cc@reductions[["umap"]]@cell.embeddings) %>%
tibble::rownames_to_column("Cell") %>%
mutate(clonotype_id = substr(hustle.cc@meta.data$clonotype_id, 9, nchar(hustle.cc@meta.data$clonotype_id))) %>%
mutate(clone_colour = case_when(
clonotype_id == "clonotype1" ~ "cornflowerblue",
clonotype_id == "clonotype2" ~ "cornflowerblue",
clonotype_id == "clonotype4" ~ "salmon",
clonotype_id == "clonotype6" ~ "lightgreen",
clonotype_id == "clonotype9" ~ "navyblue",
clonotype_id == "clonotype24" ~ "darkred",
clonotype_id == "clonotype43" ~ "yellow4",
.default = "gray80"
))
ggplot(hustle.cc.meta, aes(umap_1, umap_2, label = clonotype_id,), colour = clonotype_id) +
geom_point(size = 3, alpha = 0.8, colour = hustle.meta$clone_colour) +
theme_bw() +
theme(legend.position = "right")